###########################################
###### THIS FILE EXTENDS THE  #############
###### ANALYSIS IN DE LA O AJPS 2012 ######
###########################################

### Read data 
setwd("/Users/asad/Dropbox/Mexico CCTs/Data/DeLaO Original Replication/")
setwd ("/Users/allabaranovsky/Dropbox/Mexico CCTs/Data/DeLaO Original Replication/")
setwd("/Users/ALICE/Dropbox/Mexico CCTs/Data/DeLaO Original Replication/")
#install.packages("foreign")
#install.packages("AER")
require(foreign)
require(AER)
library("apsrtable")
install.packages("Amelia")
require("Amelia")
mexico <- read.dta("DeLaO_AJPS2013_rep_file.dta")

library("xtable", lib.loc="/Users/ALICE/Documents/R Packages/")
install.packages("stargazer")
library(stargazer)


library("AER", lib.loc="/Users/ALICE/Documents/R Packages/")
require(AER)

library("ivpack", lib.loc="/Users/ALICE/Documents/R Packages/")
require(ivpack)


##########################################################################
############ TAKE OUT TWO OUTLIER PRECINCTS ###################
##########################################################################

summary(mexico$pobtot1994)

mexico3 <- mexico[mexico$pobtot1994 < 88200,]


table3.1aOutlier <- lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                 + votos_totales1994 + pri1994 + pan1994 
                 + prd1994 + factor(villages), 
                 data=mexico3)

table3.1bOutlier <- lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                 + votos_totales1994 + pri1994 + pan1994 
                 + prd1994 + factor(villages), 
                 data=mexico3)


table3.1cOutlier <- lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                 + votos_totales1994 + pri1994 + pan1994 
                 + prd1994 + factor(villages), 
                 data=mexico3)

table3.1dOutlier <- lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                 + votos_totales1994 + pri1994 + pan1994 
                 + prd1994 + factor(villages), 
                 data=mexico3)


stargazer(table3.1aOutlier, table3.1bOutlier, table3.1cOutlier, table3.1dOutlier)


###### OVERALL TABLE 3B (IV Model) with mexico3 (Two Outlier Precincts dropped) #############


table3.2aOutlier <- ivreg(t2000 ~ treatment + 
                      avgpoverty +  pobtot1994 + votos_totales1994 + 
                      pri1994 + pan1994 + prd1994 + factor(villages)|
                      early_progresa_p + avgpoverty + pobtot1994 + 
                      votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                      factor(villages), 
                    data=mexico3)

table3.2bOutlier <- ivreg(pri2000s ~ treatment + 
                      avgpoverty + pobtot1994 + votos_totales1994 + 
                      pri1994 + pan1994 + prd1994 + factor(villages)|
                      early_progresa_p + avgpoverty + pobtot1994 + 
                      votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                      factor(villages), 
                    data=mexico3)

table3.2cOutlier <- ivreg(pan2000s ~ treatment + 
                      avgpoverty + pobtot1994 + votos_totales1994 + 
                      pri1994 + pan1994 + prd1994 + factor(villages)|
                      early_progresa_p + avgpoverty + pobtot1994 + 
                      votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                      factor(villages), 
                    data=mexico3)

table3.2dOutlier <- ivreg(prd2000s ~ treatment + 
                      avgpoverty + pobtot1994 + votos_totales1994 + 
                      pri1994 + pan1994 + prd1994 + factor(villages)|
                      early_progresa_p + avgpoverty + pobtot1994 + 
                      votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                      factor(villages), 
                    data=mexico3)

stargazer(table3.2aOutlier, table3.2bOutlier, table3.2cOutlier, table3.2dOutlier)


##########################################################################
############ TAKE OUT TURNOUT > 1.00 - mexico2 dataset ###################
##########################################################################


mexico2 <- mexico[mexico$t1994<=1,]
mexico2 <- mexico2[mexico2$t2000<=1,]

mexico2$opp2000s <- mexico2$pan2000s + mexico2$prd2000s # Opposition share 
mexico2$opp1994s <- mexico2$pan1994s + mexico2$prd1994s # Opposition share 




########## OVERALL TABLE 3A with mexico2 (Turnout <= 1.00) ###############

## No significant effects of treatment on turnout, govt share, or on opp share. 

table3.1aT <- lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                         + votos_totales1994 + pri1994 + pan1994 
                         + prd1994 + factor(villages), 
                         data=mexico2)

table3.1bT <- lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                         + votos_totales1994 + pri1994 + pan1994 
                         + prd1994 + factor(villages), 
                         data=mexico2)


table3.1cT <- lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                        + votos_totales1994 + pri1994 + pan1994 
                        + prd1994 + factor(villages), 
                        data=mexico2)

table3.1dT <- lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                        + votos_totales1994 + pri1994 + pan1994 
                        + prd1994 + factor(villages), 
                        data=mexico2)


apsrtable(table3.1aT, table3.1bT, table3.1cT, table3.1dT, digits=3)

stargazer(table3.1aT, table3.1bT, table3.1cT, table3.1dT)

###### OVERALL TABLE 3B with mexico2 (Turnout <= 1.00) #############


table3.2aT <- ivreg(t2000 ~ treatment + 
                             avgpoverty +  pobtot1994 + votos_totales1994 + 
                             pri1994 + pan1994 + prd1994 + factor(villages)|
                            early_progresa_p + avgpoverty + pobtot1994 + 
                             votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                             factor(villages), 
                           data=mexico2)

table3.2bT <- ivreg(pri2000s ~ treatment + 
                             avgpoverty + pobtot1994 + votos_totales1994 + 
                             pri1994 + pan1994 + prd1994 + factor(villages)|
                            early_progresa_p + avgpoverty + pobtot1994 + 
                             votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                             factor(villages), 
                           data=mexico2)

table3.2cT <- ivreg(pan2000s ~ treatment + 
                             avgpoverty + pobtot1994 + votos_totales1994 + 
                             pri1994 + pan1994 + prd1994 + factor(villages)|
                              early_progresa_p + avgpoverty + pobtot1994 + 
                             votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                             factor(villages), 
                           data=mexico2)

table3.2dT <- ivreg(prd2000s ~ treatment + 
                             avgpoverty + pobtot1994 + votos_totales1994 + 
                             pri1994 + pan1994 + prd1994 + factor(villages)|
                            early_progresa_p + avgpoverty + pobtot1994 + 
                             votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                             factor(villages), 
                           data=mexico2)

stargazer(table3.2aT, table3.2bT, table3.2cT, table3.2dT)
summary(table3.2aT) # IV effect on turnout goes away 
summary(table3.2bT) # IV effect on govt vote goes away 
summary(table3.2cT) # Still no IV effect on opp votes 
summary(table3.2dT)


##### HETEROGENOUS - POVERTY ##########################


summary(mexico2$avgpoverty) # Median poverty is 4.75 - use as cutoff

nrow(mexico2[mexico2$avgpoverty<=4.75,])
nrow(mexico2[mexico2$avgpoverty>4.75,])

## High poverty (>4.75)

### Absolutely zero effect on turnout. Positive but insignificant effect 
### on govt vote share. Significantly negative effect on opposition vote 
### share. This is one of our main story-turners. 


########################################################
##REVISED (USED IN PAPER) HETEROGENEOUS EFFECTS W/ IV INSTRUMENT
########################################################

##Dropping both categories of outliers:
mexico4 <- mexico2[mexico2$pobtot1994 < 88200,]

## Low poverty (< 4.75) --*Note: Results not significant

tbl.LPiv <- ivreg(t2000 ~ treatment +  pobtot1994 + votos_totales1994 + 
                      pri1994 + pan1994 + prd1994 + factor(villages)|
                    early_progresa_p + pobtot1994 + 
                      votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                      factor(villages), 
                    data=mexico4[mexico4$avgpoverty<4.75,])

summary(tbl.LPiv)

tbl.LPivPRI <- ivreg(pri2000s ~ treatment +  pobtot1994 + votos_totales1994 + 
                    pri1994 + pan1994 + prd1994 + factor(villages)|
                    early_progresa_p + pobtot1994 + 
                    votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                    factor(villages), 
                  data=mexico4[mexico4$avgpoverty<4.75,])

summary(tbl.LPivPRI)


tbl.LPivPAN <- ivreg(pan2000s ~ treatment +  pobtot1994 + votos_totales1994 + 
                       pri1994 + pan1994 + prd1994 + factor(villages)|
                       early_progresa_p + pobtot1994 + 
                       votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                       factor(villages), 
                     data=mexico4[mexico4$avgpoverty<4.75,])

summary(tbl.LPivPAN)


tbl.LPivPRD <- ivreg(prd2000s ~ treatment +  pobtot1994 + votos_totales1994 + 
                       pri1994 + pan1994 + prd1994 + factor(villages)|
                       early_progresa_p + pobtot1994 + 
                       votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                       factor(villages), 
                     data=mexico4[mexico4$avgpoverty<4.75,])

summary(tbl.LPivPRD)


tbl.LPivOPP <- ivreg(opp2000s ~ treatment +  pobtot1994 + votos_totales1994 + 
                       pri1994 + pan1994 + prd1994 + factor(villages)|
                       early_progresa_p + pobtot1994 + 
                       votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                       factor(villages), 
                     data=mexico4[mexico4$avgpoverty<4.75,])

summary(tbl.LPivOPP)


stargazer(tbl.LPiv, tbl.LPivPRI, tbl.LPivPAN, tbl.LPivPRD, tbl.LPivOPP)



########################################################
##High Poverty > 4.75
########################################################

table3.HPols <- lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                 + votos_totales1994 + pri1994 + pan1994 
                 + prd1994 + factor(villages), 
                 data=mexico4[mexico4$avgpoverty > 4.75,])

table3.HPolsPRI <- lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                 + votos_totales1994 + pri1994 + pan1994 
                 + prd1994 + factor(villages), 
                 data=mexico4[mexico4$avgpoverty > 4.75,])


table3.HPolsPAN <- lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                 + votos_totales1994 + pri1994 + pan1994 
                 + prd1994 + factor(villages), 
                 data=mexico4[mexico4$avgpoverty>4.75,])

table3.HPolsPRD <- lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                 + votos_totales1994 + pri1994 + pan1994 
                 + prd1994 + factor(villages), 
                 data=mexico4[mexico4$avgpoverty>4.75,])

table3.HPolsOPP <- lm(opp2000s ~ treatment + avgpoverty + pobtot1994 
                      + votos_totales1994 + pri1994 + pan1994 
                      + prd1994 + factor(villages), 
                      data=mexico4[mexico4$avgpoverty>4.75,])

stargazer(table3.HPols, table3.HPolsPRI, table3.HPolsPAN, table3.HPolsPRD, table3.HPolsOPP)




tbl.HPiv <- ivreg(t2000 ~ treatment +  pobtot1994 + votos_totales1994 + 
                    pri1994 + pan1994 + prd1994 + factor(villages)|
                    early_progresa_p + pobtot1994 + 
                    votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                    factor(villages), 
                  data=mexico4[mexico4$avgpoverty>4.75,])

summary(tbl.HPiv)

tbl.HPivPRI <- ivreg(pri2000s ~ treatment +  pobtot1994 + votos_totales1994 + 
                       pri1994 + pan1994 + prd1994 + factor(villages)|
                       early_progresa_p + pobtot1994 + 
                       votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                       factor(villages), 
                     data=mexico4[mexico4$avgpoverty>4.75,])

summary(tbl.HPivPRI)


tbl.HPivPAN <- ivreg(pan2000s ~ treatment +  pobtot1994 + votos_totales1994 + 
                       pri1994 + pan1994 + prd1994 + factor(villages)|
                       early_progresa_p + pobtot1994 + 
                       votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                       factor(villages), 
                     data=mexico4[mexico4$avgpoverty>4.75,])

summary(tbl.HPivPAN)


tbl.HPivPRD <- ivreg(prd2000s ~ treatment +  pobtot1994 + votos_totales1994 + 
                       pri1994 + pan1994 + prd1994 + factor(villages)|
                       early_progresa_p + pobtot1994 + 
                       votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                       factor(villages), 
                     data=mexico4[mexico4$avgpoverty>4.75,])

summary(tbl.HPivPRD)


tbl.HPivOPP <- ivreg(opp2000s ~ treatment +  pobtot1994 + votos_totales1994 + 
                       pri1994 + pan1994 + prd1994 + factor(villages)|
                       early_progresa_p + pobtot1994 + 
                       votos_totales1994 + pri1994 + pan1994 + prd1994 + 
                       factor(villages), 
                     data=mexico4[mexico4$avgpoverty>4.75,])

summary(tbl.HPivOPP)


stargazer(tbl.HPiv, tbl.HPivPRI, tbl.HPivPAN, tbl.HPivPRD, tbl.HPivOPP)

########################################################
##REVISED PRECINCT SIZE W/ IV: HIGH REGISTERED VOTERS
########################################################

###High total registered voters


table3.HVols <- lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                   + votos_totales1994 + pri1994 + pan1994 
                   + prd1994 + factor(villages), 
                   data=mexico[mexico$votos_totales1994 > 370,])

table3.HVolsPRI <- lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                      + votos_totales1994 + pri1994 + pan1994 
                      + prd1994 + factor(villages), 
                      data=mexico[mexico$votos_totales1994 > 370,])


table3.HVolsPAN <- lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                      + votos_totales1994 + pri1994 + pan1994 
                      + prd1994 + factor(villages), 
                      data=mexico[mexico$votos_totales1994 > 370,])

table3.HVolsPRD <- lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                      + votos_totales1994 + pri1994 + pan1994 
                      + prd1994 + factor(villages), 
                      data=mexico[mexico$votos_totales1994 > 370,])

table3.HVolsOPP <- lm(opp2000s ~ treatment + avgpoverty + pobtot1994 
                      + votos_totales1994 + pri1994 + pan1994 
                      + prd1994 + factor(villages), 
                      data=mexico[mexico$votos_totales1994 > 370,])

stargazer(table3.HVols, table3.HVolsPRI, table3.HVolsPAN, table3.HVolsPRD, table3.HVolsOPP)



table3.HViv <- ivreg(t2000 ~ treatment + avgpoverty + pobtot1994 
                     + pri1994 + pan1994 + prd1994 + factor(villages) | early_progresa_p
                     + avgpoverty + pobtot1994 
                     + pri1994 + pan1994 + prd1994 + factor(villages), 
                  data=mexico4[mexico4$votos_totales1994 > 370,])

summary(table3.HViv)

table3.HVivPRI <- ivreg(pri2000s ~ treatment + avgpoverty + pobtot1994 
                     + pri1994 + pan1994 + prd1994 + factor(villages) | early_progresa_p
                     + avgpoverty + pobtot1994 
                     + pri1994 + pan1994 + prd1994 + factor(villages), 
                     data=mexico4[mexico4$votos_totales1994 > 370,])

summary(table3.HVivPRI)

table3.HVivPAN <- ivreg(pan2000s ~ treatment + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages) | early_progresa_p
                        + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages), 
                        data=mexico4[mexico4$votos_totales1994 > 370,])

summary(table3.HVivPAN)


table3.HVivPRD <- ivreg(prd2000s ~ treatment + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages) | early_progresa_p
                        + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages), 
                        data=mexico4[mexico4$votos_totales1994 > 370,])

summary(table3.HVivPRD)


table3.HVivOPP <- ivreg(opp2000s ~ treatment + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages) | early_progresa_p
                        + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages), 
                        data=mexico4[mexico4$votos_totales1994 > 370,])

summary(table3.HVivOPP)


stargazer(table3.HViv, table3.HVivPRI, table3.HVivPAN, table3.HVivPRD, tbl.HPivOPP)

### NOT SIGNIFICANT


########################################################
##REVISED PRECINCT SIZE W/ IV: LOW REGISTERED VOTERS
########################################################

table3.LVols <- lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                   + votos_totales1994 + pri1994 + pan1994 
                   + prd1994 + factor(villages), 
                   data=mexico4[mexico4$votos_totales1994 < 370,])

table3.LVolsPRI <- lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                      + votos_totales1994 + pri1994 + pan1994 
                      + prd1994 + factor(villages), 
                      data=mexico4[mexico4$votos_totales1994 < 370,])


table3.LVolsPAN <- lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                      + votos_totales1994 + pri1994 + pan1994 
                      + prd1994 + factor(villages), 
                      data=mexico4[mexico4$votos_totales1994 < 370,])

table3.LVolsPRD <- lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                      + votos_totales1994 + pri1994 + pan1994 
                      + prd1994 + factor(villages), 
                      data=mexico4[mexico4$votos_totales1994 < 370,])

table3.LVolsOPP <- lm(opp2000s ~ treatment + avgpoverty + pobtot1994 
                      + votos_totales1994 + pri1994 + pan1994 
                      + prd1994 + factor(villages), 
                      data=mexico4[mexico4$votos_totales1994 < 370,])

stargazer(table3.LVols, table3.LVolsPRI, table3.LVolsPAN, table3.LVolsPRD, table3.LVolsOPP)



table3.LViv <- ivreg(t2000 ~ treatment + avgpoverty + pobtot1994 
                     + pri1994 + pan1994 + prd1994 + factor(villages) | early_progresa_p
                     + avgpoverty + pobtot1994 
                     + pri1994 + pan1994 + prd1994 + factor(villages), 
                     data=mexico2[mexico2$votos_totales1994 < 370,])

summary(table3.LViv)

table3.LVivPRI <- ivreg(pri2000s ~ treatment + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages) | early_progresa_p
                        + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages), 
                        data=mexico2[mexico2$votos_totales1994 < 370,])

summary(table3.LVivPRI)

table3.LVivPAN <- ivreg(pan2000s ~ treatment + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages) | early_progresa_p
                        + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages), 
                        data=mexico2[mexico2$votos_totales1994 < 370,])

summary(table3.LVivPAN)


table3.LVivPRD <- ivreg(prd2000s ~ treatment + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages) | early_progresa_p
                        + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages), 
                        data=mexico2[mexico2$votos_totales1994 < 370,])

summary(table3.LVivPRD)


table3.LVivOPP <- ivreg(opp2000s ~ treatment + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages) | early_progresa_p
                        + avgpoverty + pobtot1994 
                        + pri1994 + pan1994 + prd1994 + factor(villages), 
                        data=mexico2[mexico2$votos_totales1994 < 370,])

summary(table3.LVivOPP)


stargazer(table3.LViv, table3.LVivPRI, table3.LVivPAN, table3.LVivPRD, tbl.HPivOPP)

################### 1994 VOTE SHARE 



##### PRECINTS WITH ABOVE AVERAGE OPPOSITION SUPPORT 
##### IN 1994

summary(mexico3$pri2000s)

table3.1aOS <- lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp2000s<=0.24,])

table3.1bOS <- lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp2000s<=0.24,])

summary(table3.1eOS)

table3.1cOS <- lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp2000s<=0.24,])


table3.1dOS <- lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp2000s<=0.24,])

table3.1eOS <- lm(opp2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp2000s<=0.24,])

apsrtable(table3.1aOS, table3.1bOS, table3.1cOS, table3.1dOS, table3.1eOS,
          digits=3)

##### TABLE 4-A for T<=1.00

mexico3 <- mexico

### Diagnostics to check if 3 parties vote shares add up to explain crazy 
### turn out values 

mexico3 <- mexico[order(mexico$t1994),]

mexico3$alls1994 <- mexico3$pri1994s + mexico3$pan1994s + mexico3$prd1994s

mexico3$alls1994t <- mexico3$alls1994/mexico3$t1994

## PLOT 

plot(mexico3$t1994[mexico3$t1994>1.00],ylim=c(0,6))
points(mexico3$pan1994s[mexico3$t1994>1.00], col="red")
points(mexico3$prd1994s[mexico3$t1994>1.00], col="blue")
points(mexico3$pri1994s[mexico3$t1994>1.00], col="green")
points(mexico3$alls1994[mexico3$t1994>1.00], col="green")
points(mexico3$alls1994t[mexico3$t1994>1.00], col="purple")
abline(h=1)

summary(mexico3$alls1994t)

######################################################################
### FIX TURNOUT VALUES AND PARTY SHARES IN THOSE PRECINTS ############
######################################################################

mexico3$pan1994s <- ifelse(mexico3$t1994>1, 
                           mexico3$pan1994s/mexico3$t1994, 
                           mexico3$pan1994s)

mexico3$prd1994s <- ifelse(mexico3$t1994>1, 
                           mexico3$prd1994s/mexico3$t1994, 
                           mexico3$prd1994s)

mexico3$pri1994s <- ifelse(mexico3$t1994>1, 
                           mexico3$pri1994s/mexico3$t1994, 
                           mexico3$pri1994s)

mexico3$pan2000s <- ifelse(mexico3$t2000>1, 
                           mexico3$pan2000s/mexico3$t2000, 
                           mexico3$pan2000s)

mexico3$prd2000s <- ifelse(mexico3$t2000>1, 
                           mexico3$prd2000s/mexico3$t2000, 
                           mexico3$prd2000s)

mexico3$pri2000s <- ifelse(mexico3$t2000>1, 
                           mexico3$pri2000s/mexico3$t2000, 
                           mexico3$pri2000s)

mexico3$t1994 <- ifelse(mexico3$t1994>1, 1, mexico3$t1994)

mexico3$t2000 <- ifelse(mexico3$t2000>1, 1, mexico3$t2000)

mexico3$opp1994s <- mexico3$pan1994s + mexico3$prd1994s

mexico3$opp2000s <- mexico3$pan2000s + mexico3$prd2000s

## High poverty index 

table3.1aLP2 <- lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$avgpoverty>4.75,])

table3.1bLP2 <- lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$avgpoverty>4.75,])

table3.1cLP2 <- lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$avgpoverty>4.75,])

table3.1dLP2 <- lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$avgpoverty>4.75,])

table3.1eLP2 <- lm(opp2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$avgpoverty>4.75,])

apsrtable(table3.1aLP2, table3.1bLP2, table3.1cLP2, table3.1dLP2, table3.1eLP2,
          digits=3)


##### PRECINTS WITH ABOVE AVERAGE OPPOSITION SUPPORT 
##### IN 1994

summary(mexico3$pri1994s)

table3.1aOS <- lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp1994s>=0.45,])

table3.1bOS <- lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp1994s>=0.45,])

summary(table3.1eOS)

table3.1cOS <- lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp1994s>=0.45,])


table3.1dOS <- lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp1994s>=0.45,])

table3.1eOS <- lm(opp2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp1994s>=0.45,])

apsrtable(table3.1aOS, table3.1bOS, table3.1cOS, table3.1dOS, table3.1eOS,
          digits=3)


##### PRECINTS WITH BELOW AVERAGE OPPOSITION SUPPORT 
##### IN 1994

summary(mexico3$pri1994s)

table3.1aOS <- lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp1994s<=0.45,])

table3.1bOS <- lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp1994s<=0.45,])

summary(table3.1eOS)

table3.1cOS <- lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp1994s<=0.45,])


table3.1dOS <- lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp1994s<=0.45,])

table3.1eOS <- lm(opp2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$opp1994s<=0.45,])

apsrtable(table3.1aOS, table3.1bOS, table3.1cOS, table3.1dOS, table3.1eOS,
          digits=3)

##### PRECINTS WITH ABOVE AVERAGE INCUMBENT SUPPORT 
##### IN 1994

summary(mexico3$pri1994s)

table3.1aOS <- lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$pri1994s>=0.39,])

table3.1bOS <- lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$pri1994s>=0.39,])

summary(table3.1eOS)

table3.1cOS <- lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$pri1994s>=0.39,])


table3.1dOS <- lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$pri1994s>=0.39,])

table3.1eOS <- lm(opp2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$pri1994s>=0.39,])
summary(table3.1eOS)
apsrtable(table3.1aOS, table3.1bOS, table3.1cOS, table3.1dOS, table3.1eOS,
          digits=3)

##### PRECINTS WITH BELOW AVERAGE INCUMBENT SUPPORT 
##### IN 1994



table3.1aOS <- lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$pri1994s<=0.39,])

table3.1bOS <- lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$pri1994s<=0.39,])

summary(table3.1eOS)

table3.1cOS <- lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$pri1994s<=0.39,])


table3.1dOS <- lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$pri1994s<=0.39,])

table3.1eOS <- lm(opp2000s ~ treatment + avgpoverty + pobtot1994 
                  + votos_totales1994 + pri1994 + pan1994 
                  + prd1994 + factor(villages), 
                  data=mexico3[mexico3$pri1994s<=0.39,])

summary(table3.1eOS)

apsrtable(table3.1aOS, table3.1bOS, table3.1cOS, table3.1dOS, table3.1eOS,
          digits=3)


######################################################################
### AMELIA - ADDRESSING MISSING DATA ############
######################################################################


#install.packages("Amelia")
library("Amelia")

mexico.am <- read.dta("DeLaO_AJPS2013_rep_file.dta")


## Change categorical variables into factors 

mexico.am$treatment <- as.factor(mexico.am$treatment)
mexico.am$numerotreated <- as.factor(mexico.am$numerotreated)
mexico.am$numerocontrol <- as.factor(mexico.am$numerocontrol)
mexico.am$one_random <- as.factor(mexico.am$one_random)
mexico.am$two_random <- as.factor(mexico.am$two_random)
mexico.am$villages <- as.factor(mexico.am$villages)
mexico.am$villages2 <- as.factor(mexico.am$villages2)

## Drop ID variables / variables we don't know about 


mexico.am$cve_entIFE <- mexico.am$cve_mpioIFE <- 
  mexico.am$seccion <- NULL

## Drop village dummies (perfectly collinear) 

mexico.am <- mexico.am[c(-28:-40)]

## Check for collinearity 

install.packages("Hmisc")
library(Hmisc)

mexico.num <- mexico.am[,c(2,3,4,10,13:28)]

corR <- rcorr(as.matrix(mexico.num), type="spearman")

## pobtot1994 highly corr with some of the vote counts 

## some of the rep counts highly correlated with turnout or vote counts 


## Tell Amelia which variables are nominal or ordinal categorical 

nom <- c("treatment","one_random","two_random")

ord <- c("villages")

## Just to see if it works, drop numerotreated and numerocontrol 

mexico.am <- mexico.am[c(-5:-6)]

names(mexico.am)

mexico.am <- mexico.am[c(-9)] # drop villages 2 - mostly NA's and quite useless


# run a multiple imputation, making 5 new imputed data sets

a.out <- amelia(mexico.am, 
                m = 5, 
                p2s = 1, 
                noms = nom, 
                ords = ord
)

# run a multiple imputation, making 5 new imputed data sets, using a ridge prior 

a.out1 <- amelia(mexico.am, 
                m = 5, 
                p2s = 1, 
                noms = nom, 
                ords = ord, 
                empri = .01*nrow(mexico.am)
)

missmap(a.out)


######################################################################
### OLS with Amelia
######################################################################


OLS.ameliaT <- lapply(a.out$imputations, function(i) lm(t2000 ~ treatment + avgpoverty + pobtot1994 
                                                       + votos_totales1994 + pri1994 + pan1994 
                                                       + prd1994, 
                                                        data = i))
summary(OLS.ameliaT$imp1)
coef(summary(OLS.ameliaT$imp1))[,1]
summary(OLS.ameliaT$imp2)
summary(OLS.ameliaT$imp3)
summary(OLS.ameliaT$imp4)
summary(OLS.ameliaT$imp5)

coef(summary(i))[,  4]

amelia.OLScoefT <- do.call(rbind, lapply(OLS.ameliaT, function(i) coef(summary(i))[,1]))
amelia.OLSseT <- do.call(rbind, lapply(OLS.ameliaT, function(i) coef(summary(i))[,2]))
amelia.OLSpvalT <- do.call(rbind, lapply(OLS.ameliaT, function(i) coef(summary(i))[,4]))

do.call(rbind, lapply(OLS.ameliaT, function(i) coef(summary(i))[,4]))

amelia.OLSturnout <- mi.meld(amelia.OLScoefT, amelia.OLSseT, amelia.OLSpvalT)
amelia.OLSturnout

OLS.ameliaPRI <- lapply(a.out$imputations, function(i) lm(pri2000s ~ treatment + avgpoverty + pobtot1994 
                                                          + votos_totales1994 + pri1994 + pan1994 
                                                          + prd1994, 
                                                          data = i))

OLS.ameliaPRI$imp1
OLS.ameliaPRI$imp2
OLS.ameliaPRI$imp3
OLS.ameliaPRI$imp4
OLS.ameliaPRI$imp5

amelia.OLScoefPRI <- do.call(rbind, lapply(OLS.ameliaPRI, function(i) coef(summary(i))[,1]))
amelia.OLSsePRI <- do.call(rbind, lapply(OLS.ameliaPRI, function(i) coef(summary(i))[,2]))
amelia.OLSpvalPRI <- do.call(rbind, lapply(OLS.ameliaPRI, function(i) coef(summary(i))[,4]))

amelia.OLSpri <- mi.meld(amelia.OLScoefPRI, amelia.OLSsePRI, amelia.OLSpvalPRI)
amelia.OLSpri


OLS.ameliaPAN <- lapply(a.out$imputations, function(i) lm(pan2000s ~ treatment + avgpoverty + pobtot1994 
                                                        + votos_totales1994 + pri1994 + pan1994 
                                                          + prd1994, 
                                                          data = i))

OLS.ameliaPAN$imp1
OLS.ameliaPAN$imp2
OLS.ameliaPAN$imp3
OLS.ameliaPAN$imp4
OLS.ameliaPAN$imp5

amelia.OLScoefPAN <- do.call(rbind, lapply(OLS.ameliaPAN, function(i) coef(summary(i))[,1]))
amelia.OLSsePAN <- do.call(rbind, lapply(OLS.ameliaPAN, function(i) coef(summary(i))[,2]))
amelia.OLSpvalPAN <- do.call(rbind, lapply(OLS.ameliaPAN, function(i) coef(summary(i))[,4]))

amelia.OLSpan <- mi.meld(amelia.OLScoefPAN, amelia.OLSsePAN, amelia.OLSpvalPAN)
amelia.OLSpan

OLS.ameliaPRD <- lapply(a.out$imputations, function(i) lm(prd2000s ~ treatment + avgpoverty + pobtot1994 
                                                          + votos_totales1994 + pri1994 + pan1994 
                                                          + prd1994, 
                                                          data = i))

OLS.ameliaPRD$imp1
OLS.ameliaPRD$imp2
OLS.ameliaPRD$imp3
OLS.ameliaPRD$imp4
OLS.ameliaPRD$imp5

amelia.OLScoefPRD <- do.call(rbind, lapply(OLS.ameliaPRD, function(i) coef(summary(i))[,1]))
amelia.OLSsePRD <- do.call(rbind, lapply(OLS.ameliaPRD, function(i) coef(summary(i))[,2]))
amelia.OLSpvalPRD <- do.call(rbind, lapply(OLS.ameliaPRD, function(i) coef(summary(i))[,4]))

amelia.OLSprd <- mi.meld(amelia.OLScoefPRD, amelia.OLSsePRD, amelia.OLSpvalPRD)
amelia.OLSprd

turnout.OLSam <- cbind(amelia.OLSturnout$q.mi, amelia.OLSturnout$se.mi)
xtable(turnout.OLSam)

stargazer(amelia.OLSturnout)

stargazer(amelia.OLSpan)


OLS.ameliaOPP <- lapply(a.out$imputations, function(i) lm(opp2000s ~ treatment + avgpoverty + pobtot1994 
                                                          + votos_totales1994 + pri1994 + pan1994 
                                                          + prd1994, 
                                                          data = i))

OLS.ameliaPRD$imp1
OLS.ameliaPRD$imp2
OLS.ameliaPRD$imp3
OLS.ameliaPRD$imp4
OLS.ameliaPRD$imp5

######################################################################
### IV Model with Amelia
######################################################################


IV.ameliaT <- lapply(a.out$imputations, function(i) ivreg(t2000 ~ treatment + 
                       avgpoverty +  pobtot1994 + votos_totales1994 + 
                       pri1994 + pan1994 + prd1994|
                       early_progresa_p + avgpoverty + pobtot1994 + 
                       votos_totales1994 + pri1994 + pan1994 + prd1994, 
                       data = i))

summary(IV.ameliaT$imp1)
summary(IV.ameliaT$imp2)
summary(IV.ameliaT$imp3)
summary(IV.ameliaT$imp4)
summary(IV.ameliaT$imp5)
                         
amelia.IVcoefT <- do.call(rbind, lapply(IV.ameliaT, function(i) coef(summary(i))[,1]))
amelia.IVseT <- do.call(rbind, lapply(IV.ameliaT, function(i) coef(summary(i))[,2]))
amelia.IVpvalT <- do.call(rbind, lapply(IV.ameliaT, function(i) coef(summary(i))[,4]))

amelia.IVt <- mi.meld(amelia.IVcoefT, amelia.IVseT, amelia.IVpvalT)
amelia.IVt


IV.ameliaPRI <- lapply(a.out$imputations, function(i) ivreg(pri2000s ~ treatment + 
                                                            avgpoverty +  pobtot1994 + votos_totales1994 + 
                                                            pri1994 + pan1994 + prd1994|
                                                            early_progresa_p + avgpoverty + pobtot1994 + 
                                                            votos_totales1994 + pri1994 + pan1994 + prd1994, 
                                                          data = i))

amelia.IVcoefpri <- do.call(rbind, lapply(IV.ameliaPRI, function(i) coef(summary(i))[,1]))
amelia.IVsepri <- do.call(rbind, lapply(IV.ameliaPRI, function(i) coef(summary(i))[,2]))
amelia.IVpvalpri <- do.call(rbind, lapply(IV.ameliaPRI, function(i) coef(summary(i))[,4]))

amelia.IVpri <- mi.meld(amelia.IVcoefpri, amelia.IVsepri, amelia.IVpvalpri)
amelia.IVpri

summary(IV.ameliaPRI$imp1)
summary(IV.ameliaPRI$imp2)
summary(IV.ameliaPRI$imp3)
summary(IV.ameliaPRI$imp4)
summary(IV.ameliaT$imp5)

IV.ameliaPAN <- lapply(a.out$imputations, function(i) ivreg(pan2000s ~ treatment + 
                                                              avgpoverty +  pobtot1994 + votos_totales1994 + 
                                                              pri1994 + pan1994 + prd1994|
                                                              early_progresa_p + avgpoverty + pobtot1994 + 
                                                              votos_totales1994 + pri1994 + pan1994 + prd1994, 
                                                            data = i))

amelia.IVcoefpan <- do.call(rbind, lapply(IV.ameliaPAN, function(i) coef(summary(i))[,1]))
amelia.IVsepan <- do.call(rbind, lapply(IV.ameliaPAN, function(i) coef(summary(i))[,2]))
amelia.IVpvalpan <- do.call(rbind, lapply(IV.ameliaPAN, function(i) coef(summary(i))[,4]))

amelia.IVpan <- mi.meld(amelia.IVcoefpan, amelia.IVsepan, amelia.IVpvalpan)
amelia.IVpan


summary(IV.ameliaPAN$imp1)
summary(IV.ameliaPAN$imp2)
summary(IV.ameliaPAN$imp3)
summary(IV.ameliaPAN$imp4)
summary(IV.ameliaPAN$imp5)

IV.ameliaPRD <- lapply(a.out$imputations, function(i) ivreg(prd2000s ~ treatment + 
                                                              avgpoverty +  pobtot1994 + votos_totales1994 + 
                                                              pri1994 + pan1994 + prd1994|
                                                              early_progresa_p + avgpoverty + pobtot1994 + 
                                                              votos_totales1994 + pri1994 + pan1994 + prd1994, 
                                                            data = i))

amelia.IVcoefprd <- do.call(rbind, lapply(IV.ameliaPRD, function(i) coef(summary(i))[,1]))
amelia.IVseprd <- do.call(rbind, lapply(IV.ameliaPRD, function(i) coef(summary(i))[,2]))
amelia.IVpvalprd <- do.call(rbind, lapply(IV.ameliaPRD, function(i) coef(summary(i))[,4]))

amelia.IVprd <- mi.meld(amelia.IVcoefpan, amelia.IVsepan, amelia.IVpvalpan)
amelia.IVprd

summary(IV.ameliaPRD$imp1)
summary(IV.ameliaPRD$imp2)
summary(IV.ameliaPRD$imp3)
summary(IV.ameliaPRD$imp4)
summary(IV.ameliaPRD$imp5)

